Objective: 1) identify all closely genetic pairs < x core variants 2) filter pairs that have mortality metadata (survived <-> died or survived <-> survived or died <-> died) 3) Extract all mutations/genes corresponding for this different “mortality switches/non-switches” 4) Identify all the mutations specific to survived <-> died switches (<=> mut_survived<->died - mut_survived<->survived (- mut_died<->died ?)) 5) From all closely related paired isolates filter the one that have been already phenotyped 6) Calculate changes (delta) for each measured phenotype (all GC and PI parameters) 7) Investigate difference in delta of all parameter changes for survived <-> died VS survived <-> survived (VS died<->died -> Is there a systematic significant change between theses parameters (absolute delta)? -> Are these changes directionnal (eg decrease in growth rate or AUC_cell_death) 8) Accessory question are there phenotypic changes not associated with detected genotypic changes -> should we investigate structural variants (nanopore pacbio ?)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.0 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(here)
## here() starts at /home/rguerillot/Documents/Travail/Abdou_project/Staph_infection_project/github_analysis/VANANZ_phenotypes
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = here())
print(paste("My working directory is:" ,here()))
## [1] "My working directory is: /home/rguerillot/Documents/Travail/Abdou_project/Staph_infection_project/github_analysis/VANANZ_phenotypes"
source("Functions/all_functions.R")
snp.dist.mat <- read.csv("Data_analysis/Genetic_pairs_analysis/VANANZ_core.aln.dist.mat", sep = "\t") %>%
as.matrix(.)
# put 1st column as row name and remove 1st column
row.names(snp.dist.mat) <- snp.dist.mat[,1]
snp.dist.mat <- snp.dist.mat[,2:845]
require(harrietr)
## Loading required package: harrietr
## Registered S3 method overwritten by 'treeio':
## method from
## root.phylo ape
snp.dist.df <- harrietr::melt_dist(snp.dist.mat) %>%
mutate(dist = as.integer(dist)) %>%
filter(iso1 != "Reference") # remove reference
## Warning: attributes are not identical across measure variables;
## they will be dropped
hist(snp.dist.df$dist, breaks = 1000)
hist(snp.dist.df$dist, breaks = 100000, xlim = c(0,100))
close_strain_symetrical.df <- close_strain.df %>%
mutate(isoA = iso1, isoB = iso2) %>%
mutate(iso1 = isoB, iso2 = isoA) %>%
select(iso1, iso2, dist) %>%
rbind(close_strain.df, .)
ST_to_CC.df <- read.csv("Ideas_Grant_2020_analysis/Raw_data/Saureus_CC_to_ST.csv")
# !!! use corrected mortality metadata, the following changes in the sample metadata table have been made:
# 1) all isolates recovered from patient who survived are now labeled "survived" (previously only first isolate)
# 2) only the last isolate from patient who died are labeled "died" (previously on l first)
sample_meta_iso1.df <- read.csv("Ideas_Grant_2020_analysis/Raw_data/strain_metadata_corrected_mortality_with_controls.csv") %>%
merge(., ST_to_CC.df, by = "ST", all.x = T) %>%
select_all(.funs = funs(paste0("iso1_", .)))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
sample_meta_iso2.df <- read.csv("Ideas_Grant_2020_analysis/Raw_data/strain_metadata_corrected_mortality_with_controls.csv") %>%
merge(., ST_to_CC.df, by = "ST", all.x = T) %>%
select_all(.funs = funs(paste0("iso2_", .)))
close_strain_symetrical.df <- merge(close_strain_symetrical.df,
sample_meta_iso1.df,
by.x = "iso1",
by.y = "iso1_sample_id") %>%
merge(.,
sample_meta_iso2.df,
by.x = "iso2",
by.y = "iso2_sample_id") %>%
mutate(CC = ifelse(is.na(iso1_CC), yes = as.character(iso2_CC), no = as.character(iso1_CC)))
sample_GC_param.df <- read.csv("Ideas_Grant_2020_analysis/Processed_data/Growth_curves/processed_median_parameters_GC.csv") %>%
filter(strain_group != "CONTROL") %>%
select(sample_id, ends_with("OD"))
sample_PI_param.df <- read.csv("Ideas_Grant_2020_analysis/Processed_data/PI_curves/processed_median_parameters_PI.csv") %>%
filter(strain_group != "CONTROL") %>%
select(sample_id, ends_with("death"))
# merge PI and GC param
sample_param.df <- merge(sample_GC_param.df, sample_PI_param.df)
# merge iso1 and iso2 pheno data with close_strain_symetrical.df
sample_param_iso1.df <- sample_param.df %>%
select_all(.funs = funs(paste0("iso1_", .)))
sample_param_iso2.df <- sample_param.df %>%
select_all(.funs = funs(paste0("iso2_", .)))
close_strain_symetrical_with_pheno.df <- merge(close_strain_symetrical.df,
sample_param_iso1.df,
by.x = "iso1",
by.y = "iso1_sample_id") %>%
merge(.,
sample_param_iso2.df,
by.x = "iso2",
by.y = "iso2_sample_id")
close_strain_symetrical_with_pheno_changes.df <- close_strain_symetrical_with_pheno.df %>%
mutate(delta_time_of_max_rate_OD = iso2_time_of_max_rate_OD - iso1_time_of_max_rate_OD) %>%
mutate(delta_max_rate_OD = iso2_max_rate_OD - iso1_max_rate_OD) %>%
mutate(delta_doubling_time_OD = iso2_doubling_time_OD - iso1_doubling_time_OD) %>%
mutate(delta_AUC_OD = iso2_AUC_OD - iso1_AUC_OD) %>%
mutate(delta_time_of_max_OD = iso2_time_of_max_OD - iso1_time_of_max_OD) %>%
mutate(delta_time_of_min_OD = iso2_time_of_min_OD - iso1_time_of_min_OD) %>%
mutate(delta_max_OD = iso2_max_OD - iso1_max_OD) %>%
mutate(delta_min_OD = iso2_min_OD - iso1_min_OD) %>%
mutate(delta_end_point_OD = iso2_end_point_OD - iso1_end_point_OD) %>%
mutate(delta_time_of_max_rate_death = iso2_time_of_max_rate_death - iso1_time_of_max_rate_death) %>%
mutate(delta_max_rate_death = iso2_max_rate_death - iso1_max_rate_death) %>%
mutate(delta_doubling_time_death = iso2_doubling_time_death - iso1_doubling_time_death) %>%
mutate(delta_AUC_death = iso2_AUC_death - iso1_AUC_death) %>%
mutate(delta_time_of_max_death = iso2_time_of_max_death - iso1_time_of_max_death) %>%
mutate(delta_time_of_min_death = iso2_time_of_min_death - iso1_time_of_min_death) %>%
mutate(delta_max_death = iso2_max_death - iso1_max_death) %>%
mutate(delta_min_death = iso2_min_death - iso1_min_death) %>%
mutate(delta_end_point_death = iso2_end_point_death - iso1_end_point_death) %>%
mutate(log2fc_time_of_max_rate_OD = log2(iso2_time_of_max_rate_OD / iso1_time_of_max_rate_OD)) %>%
mutate(log2fc_max_rate_OD = log2(iso2_max_rate_OD / iso1_max_rate_OD)) %>%
mutate(log2fc_doubling_time_OD = log2(iso2_doubling_time_OD / iso1_doubling_time_OD)) %>%
mutate(log2fc_AUC_OD = log2(iso2_AUC_OD / iso1_AUC_OD)) %>%
mutate(log2fc_time_of_max_OD = log2(iso2_time_of_max_OD / iso1_time_of_max_OD)) %>%
mutate(log2fc_time_of_min_OD = log2(iso2_time_of_min_OD / iso1_time_of_min_OD)) %>%
mutate(log2fc_max_OD = log2(iso2_max_OD / iso1_max_OD)) %>%
mutate(log2fc_min_OD = log2(iso2_min_OD / iso1_min_OD)) %>%
mutate(log2fc_end_point_OD = log2(iso2_end_point_OD / iso1_end_point_OD)) %>%
mutate(log2fc_time_of_max_rate_death = log2(iso2_time_of_max_rate_death / iso1_time_of_max_rate_death)) %>%
mutate(log2fc_max_rate_death = log2(iso2_max_rate_death / iso1_max_rate_death)) %>%
mutate(log2fc_doubling_time_death = log2(iso2_doubling_time_death / iso1_doubling_time_death)) %>%
mutate(log2fc_AUC_death = log2(iso2_AUC_death / iso1_AUC_death)) %>%
mutate(log2fc_time_of_max_death = log2(iso2_time_of_max_death / iso1_time_of_max_death)) %>%
mutate(log2fc_time_of_min_death = log2(iso2_time_of_min_death / iso1_time_of_min_death)) %>%
mutate(log2fc_max_death = log2(iso2_max_death / iso1_max_death)) %>%
mutate(log2fc_min_death = log2(iso2_min_death / iso1_min_death)) %>%
mutate(log2fc_end_point_death = log2(iso2_end_point_death / iso1_end_point_death))
## Warning: NaNs produced
## Warning: NaNs produced
## Warning: NaNs produced
close_strain_symetrical_with_pheno_changes.df <- close_strain_symetrical_with_pheno_changes.df %>%
mutate(switches = ifelse(iso1_mortality == "Survived" & iso2_mortality == "Died", "Survived-Died", NA)) %>%
mutate(switches = ifelse(iso1_mortality == "Survived" & iso2_mortality == "Survived", "Survived-Survived", switches)) %>%
mutate(switches = ifelse(iso1_mortality == "Died" & iso2_mortality == "Died", "Died-Died", switches)) %>%
mutate(switches = ifelse(iso1_mortality == "Died" & iso2_mortality == "Survived", "Died-Survived", switches)) # %>%
#select(iso1, iso2, iso1_mortality, iso2_mortality, switches)
for (var in grep("delta", colnames(close_strain_symetrical_with_pheno_changes.df), value = T)) {
t <- ggviolin(data = close_strain_symetrical_with_pheno_changes.df,
y = var,
x = "switches",
fill = "switches", add = "jitter"
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
close_strain_symetrical_with_pheno_changes_no_dup.df <- close_strain_symetrical_with_pheno_changes.df %>%
rowwise() %>%
mutate(key = paste(sort(c(iso1, iso2, switches)), collapse = "")) %>%
#select(iso1, iso2, switches, key)
distinct(key, .keep_all = T)
for (var in grep("delta", colnames(close_strain_symetrical_with_pheno_changes_no_dup.df), value = T)) {
t <- ggviolin(data = close_strain_symetrical_with_pheno_changes_no_dup.df,
y = var,
x = "switches",
fill = "switches", add = "jitter"
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
# plot only survived-survived and survived-died
close_strain_symetrical_with_pheno_changes_no_dup.df <- close_strain_symetrical_with_pheno_changes_no_dup.df %>%
filter(switches %in% c("Survived-Survived", "Survived-Died"))
for (var in grep("delta", colnames(close_strain_symetrical_with_pheno_changes_no_dup.df), value = T)) {
t <- ggboxplot(data = close_strain_symetrical_with_pheno_changes_no_dup.df,
y = var,
x = "switches",
fill = "switches", add = "jitter"
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
for (var in grep("log2fc", colnames(close_strain_symetrical_with_pheno_changes.df), value = T)) {
t <- ggboxplot(data = close_strain_symetrical_with_pheno_changes_no_dup.df,
y = var,
x = "switches",
fill = "switches", add = "jitter"
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
## Warning: Removed 56 rows containing non-finite values (stat_boxplot).
## Warning: Removed 56 rows containing non-finite values (stat_compare_means).
## Warning: Removed 29 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).
## Warning: Removed 9 rows containing non-finite values (stat_compare_means).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_compare_means).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## Warning: Removed 15 rows containing non-finite values (stat_compare_means).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_compare_means).
## Warning: Removed 35 rows containing missing values (geom_point).
for (var in grep("delta", colnames(close_strain_symetrical_with_pheno_changes.df), value = T)) {
t <- ggboxplot(data = close_strain_symetrical_with_pheno_changes_no_dup.df %>% filter(CC %in% c("CC1", "CC22", "CC8")),
y = var,
x = "switches",
fill = "switches", add = "jitter", facet.by = "CC",
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
for (var in grep("log2fc", colnames(close_strain_symetrical_with_pheno_changes.df), value = T)) {
t <- ggboxplot(data = close_strain_symetrical_with_pheno_changes_no_dup.df %>% filter(CC %in% c("CC1", "CC22", "CC8")),
y = var,
x = "switches",
fill = "switches", add = "jitter", facet.by = "CC",
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 26 rows containing non-finite values (stat_boxplot).
## Warning: Removed 26 rows containing non-finite values (stat_compare_means).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_compare_means).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing non-finite values (stat_compare_means).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## Warning: Removed 12 rows containing non-finite values (stat_compare_means).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 22 rows containing non-finite values (stat_boxplot).
## Warning: Removed 22 rows containing non-finite values (stat_compare_means).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
for (var in grep("delta", colnames(close_strain_symetrical_with_pheno_changes.df), value = T)) {
t <- ggviolin(data = close_strain_symetrical_with_pheno_changes_no_dup.df %>% filter(dist == 0),
y = var,
x = "switches",
fill = "switches", add = "jitter", label = "iso1"
) +
theme_bw() +
theme(legend.position = "none")+
stat_compare_means(ref.group ="Survived-Survived",
method = "wilcox.test",
label = "p.signif")
print(t)
}
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default